import numpy as np
import matplotlib.pyplot as plt
from matplotlib import gridspec
import seaborn as sns
import glob
import os
import re
import pandas as pd
import tarfile
from sklearn.decomposition import PCA
import P4J
%matplotlib inline
plt.rcParams['figure.figsize'] = (9, 6)
sns.set(style="whitegrid", color_codes=True, context="poster")
mainpath = '/Users/jorgetil/Astro/HITS'
def give_me_lc(field, CCD, X, Y, extract=False):
year = field[:-3]
try:
tar = tarfile.open("%s/lightcurves/%s/%s/%s/%s_%s_gr_LC_50.tar.gz"
% (mainpath, year, field, CCD, field, CCD))
fil = tar.extractfile('%s_%s_%s_%s_g.dat' % (field, CCD, X, Y))
if extract:
tar.extract('%s_%s_%s_%s_g.dat' % (field, CCD, X, Y),
path='/Users/jorgetil/Astro/HITS/lightcurves/samples/.')
except:
print 'No tar file or element in tar file'
return None
time, mag, err = [], [], []
for line in fil:
if line[0] == '#': continue
values = line.split()
time.append(float(values[1]))
mag.append(float(values[2]))
err.append(float(values[3]))
time = np.asarray(time)
mag= np.asarray(mag)
err = np.asarray(err)
try:
fil = tar.extractfile('%s_%s_%s_%s_r.dat' % (field, CCD, X, Y))
#tar.extract('%s_%s_%s_%s_r.dat' % (field, CCD, X, Y)
# , path='/Users/jorgetil/Downloads/.')
time2, mag2, err2 = [], [], []
for line in fil:
if line[0] == '#': continue
values = line.split()
time2.append(float(values[1]))
mag2.append(float(values[2]))
err2.append(float(values[3]))
time2 = np.asarray(time2)
mag2 = np.asarray(mag2)
err2 = np.asarray(err2)
return time, mag, err, time2, mag2, err2
except:
print 'No lightcurve for other filter'
return time, mag, err, None, None, None
# load feature table into DF
#table_file = '%s/tables/Blind14A_tables+feat_pl_var_type_spCL_spClass.csv' % (mainpath)
table_file = '%s/tables/Blind15A_label_sample.csv' % (mainpath)
table_15 = pd.read_csv(table_file)
table_15.set_index('ID', inplace=True)
table_15.info()
plt.plot(table_15.PeriodLS, table_15.WMCC_Period, 'b.')
plt.xlim(0,15)
plt.xlim(0,15)
plt.show()
rrl_ = table_15.query('Var_Type == "RRLYR"')
cep_ = table_15.query('Var_Type == "CEP"')
ebs_ = table_15.query('Var_Type == "EB"')
plt.plot(rrl_.PeriodLS, rrl_.Amplitude, '.b', label='RRLYR')
plt.plot(cep_.PeriodLS, cep_.Amplitude, '.r', label='CEP')
plt.plot(ebs_.PeriodLS, ebs_.Amplitude, '.g', label='EB')
plt.legend(loc='best')
plt.xscale('log')
plt.show()
WCMM_new = []
WCMM_old = []
ls = []
periodogram = True
#for idx, row in table_15.query('WMCC_conf >= 0.98 and PeriodLS >= 1.5').head(50).iterrows():
for idx, row in table_15.query('Var_Type == "CEP"').iterrows():
print '\r',idx
label = row['Var_Type']
print row[['Var_Type','FLUX_RADIUS','ELLIPTICITY','FWHM',
'FLAGS','Mean','Median_g',
'Std','MedianAbsDev','Meanvariance','Eta_e',
'Pred_Var_Type','Prob_Pred','PeriodLS',
'Period_fit', 'WMCC_Period', 'WMCC_conf']]
T = float(row['PeriodLS'])
field, CCD, X, Y = re.findall(
r'(\w+\d+\w?\_\d\d?)\_(\w\d+?)\_(\d+)\_(\d+)', idx)[0]
try:
time, mag, err, time2, mag2, err2 = give_me_lc(field, CCD, X, Y,extract=False)
except:
print 'fail during LC read'
continue
if periodogram:
M = 1
WMCC_model = P4J.periodogram(M=M, method='WMCC')
WMCC_model.fit(time, mag, err)
freq, per = WMCC_model.grid_search(fmin=1/30.0, fmax=1/.01, fres_coarse=2,
fres_fine=0.05, n_local_max=10)
fbest = WMCC_model.get_best_frequency()
WMCC_model.fit_extreme_cdf(n_bootstrap=40, n_frequencies=40)
falsa_alarm = np.asarray([0.05, 0.01, 0.001])
per_levels = WMCC_model.get_FAP(falsa_alarm)
confidence_best_freq = WMCC_model.get_confidence(fbest[1])
print 1/fbest[0]
fig = plt.figure(figsize=(15,14))
gs = gridspec.GridSpec(3,2)
ax1 = fig.add_subplot(gs[0,:])
ax2 = fig.add_subplot(gs[1,0])
ax3 = fig.add_subplot(gs[1,1])
ax4 = fig.add_subplot(gs[2,1])
ax5 = fig.add_subplot(gs[2,0])
# plot of periodogram from WMCC
ax1.plot(freq, per, 'k-', linewidth=1)
ax1.axvline(1/T, ls=':', c='c', linewidth=4, alpha=0.9)
ax1.axvline(2/T, ls=':', c='y', linewidth=4, alpha=0.9)
# Print confidence bars
xmin, xmax = ax1.get_xlim()
for i in range(0, len(falsa_alarm)):
ax1.axhline(per_levels[i], ls='--', c='b', linewidth=4, alpha=0.5)
ax1.annotate('%0.3f' % (1.0-falsa_alarm[i]), xy=(xmin+0.01*(xmax-xmin),
per_levels[i]), fontsize=12)
# Print max of periodogram
ymin, ymax = ax1.get_ylim()
ax1.axvline(fbest[0], ls='--', c='r', linewidth=4, alpha=0.7)
ax1.set_ylim([ymin, ymax])
ax1.set_xlabel('Frequency [1/MJD]', fontsize=15)
ax1.set_ylabel('Periodogram', fontsize=15)
ax1.annotate(' %0.3f' % (confidence_best_freq),
xy=(fbest[0], fbest[1]) , fontsize=12)
# LC
ax2.errorbar(time, mag, yerr=err, fmt='b*')
ax2.set_xlabel('time')
# phase LC from LS
phase1 = np.mod(time, T) / T
sort_idx1 = np.argsort(phase1)
PHASE = np.hstack([phase1[sort_idx1], phase1[sort_idx1]+1.])
MAG = np.hstack([mag[sort_idx1],mag[sort_idx1]])
ERR = np.hstack([err[sort_idx1],err[sort_idx1]])
ax3.errorbar(PHASE-1, MAG, yerr=ERR, fmt='g*', label='LS')
ax3.legend(loc='best')
#if label == 'RRLYR' or label == 'EB' or label == 'CEP' or label=='BE':
T_C = float(row['WMCC_Period'])
t_c = 1/fbest[0]
ls.append(T)
WCMM_old.append(T_C)
WCMM_new.append(t_c)
print 'WMCC/LS = ', T_C/T
print 'OLD WMCC period M=3', T_C
print 'NEW WMCC period M=%i' % (M), t_c
phase_C = np.mod(time, T_C) / T_C
sort_idx_C = np.argsort(phase_C)
PHASE_C = np.hstack([phase_C[sort_idx_C], phase_C[sort_idx_C]+1.])
MAG_C = np.hstack([mag[sort_idx_C],mag[sort_idx_C]])
ERR_C = np.hstack([err[sort_idx_C],err[sort_idx_C]])
ax4.errorbar(PHASE_C-1, MAG_C, yerr=ERR_C, fmt='b*', alpha=.7, label='WMCC_old')
ax4.set_xlabel('phase', fontsize=15)
ax4.legend(loc='best')
phase_cc = np.mod(time, t_c) / t_c
sort_idx_cc = np.argsort(phase_cc)
PHASE_cc = np.hstack([phase_cc[sort_idx_cc], phase_cc[sort_idx_cc]+1.])
MAG_cc = np.hstack([mag[sort_idx_cc],mag[sort_idx_cc]])
ERR_cc = np.hstack([err[sort_idx_cc],err[sort_idx_cc]])
ax5.errorbar(PHASE_cc-1, MAG_cc, yerr=ERR_cc, fmt='b*', alpha=.7, label='WMCC_new')
ax5.set_xlabel('phase', fontsize=15)
ax5.legend(loc='best')
ax2.set_ylabel('g', fontsize=15)
ax2.invert_yaxis()
ax3.invert_yaxis()
ax4.invert_yaxis()
ax5.invert_yaxis()
plt.show()
else:
plt.errorbar(time, mag, yerr=err, fmt='b*')
plt.xlabel('time')
plt.ylabel('g')
plt.gca().invert_yaxis()
plt.show()
plt.hist(table_15.query('Var_Type=="CEP"').Amplitude, bins=15, alpha=.5)
plt.hist(table_15.query('Var_Type=="RRLYR"').Amplitude, bins=25, alpha=.5)
plt.show()
cep_ = table_15.query('Var_Type == "CEP"')
rrl_ = table_15.query('Var_Type == "RRLYR"')
ebs_ = table_15.query('Var_Type == "EB"')
plt.plot(table_15.Color, table_15.Mean, '.b')
plt.plot(cep_.Color, cep_.Mean, '*r')
plt.plot(rrl_.Color, rrl_.Mean, '*g')
#plt.plot(ebs_.Color, ebs_.Mean, '*c')
plt.xlim(-3,3)
plt.ylim(15,25)
plt.show()
table_15.loc[table_15.query('Std<0.01 and Meanvariance<0.01 and MedianAbsDev<0.01').sample(95).index, 'Var_Type'] = 'NV'
table_15.info()
#table_15.to_csv('%s/tables/__Blind15A_tables+feat_filter_type_sub_sp.csv'
# % (mainpath))